Libraries¶

In [ ]:
import pandas as pd # Needed For Various Python Built-in Functions
from sklearn.model_selection import train_test_split # Used to Split the dataset into train_dataset and test_dataset
from sklearn.preprocessing import StandardScaler # Needed to ensures that all of the features are given equal importance during modeling
from sklearn.naive_bayes import MultinomialNB # Naive Bayes Model
from sklearn.neighbors import KNeighborsClassifier # KNN Model
from sklearn.tree import DecisionTreeClassifier # Decision Tree Model
from sklearn.linear_model import LogisticRegression # Logistic Regression
from sklearn.svm import SVC # SVM Model
from sklearn.metrics import confusion_matrix # Used for model evaluation, that means in depth details of testing and generates accuracy for the model
from sklearn.calibration import calibration_curve # Used to Plot the Prediction Result of a binary classifier Trained Model
import seaborn as sns # Used to Plot the Heatmap of the Co-Relation Diagram
import matplotlib.pyplot as plt # Used to declare the the figure or frame size

Load dataset¶

In [ ]:
data = pd.read_csv('/content/drive/MyDrive/DATASETS/MACHINE _LEARNING/ahsan.csv')
df = data

Data Pre-processing¶

Dataset Shape¶

In [ ]:
df.shape
Out[ ]:
(2308, 21)

Dataset Summary¶

In [ ]:
df.describe()
Out[ ]:
Year Rank Score Total Score Percentage Penalty
count 2308.000000 2265.000000 2275.000000 2308.000000 2275.000000 2190.000000
mean 2011.681976 53.217219 3.875604 11.132582 0.348523 655.082648
std 6.320118 33.887514 2.225798 1.608310 0.194726 391.896365
min 1999.000000 1.000000 0.000000 8.000000 0.000000 0.000000
25% 2007.000000 25.000000 2.000000 10.000000 0.200000 365.250000
50% 2012.000000 49.000000 4.000000 11.000000 0.330000 638.000000
75% 2017.000000 78.000000 5.000000 12.000000 0.500000 934.500000
max 2021.000000 139.000000 13.000000 15.000000 1.000000 1912.000000

Total Null Values¶

In [ ]:
df.isnull().sum()
Out[ ]:
Year                   0
Date                   0
Host                   0
City                   0
Venue                  0
Rank                  43
University             0
Country                0
Team                   2
Contestant 1         400
Contestant 2         400
Contestant 3         401
Gold                   0
Silver                 0
Bronze                 0
Honorable              0
Score                 33
Total                  0
Score Percentage      33
Penalty              118
Prize               2168
dtype: int64

All Data types¶

In [ ]:
df.dtypes
Out[ ]:
Year                  int64
Date                 object
Host                 object
City                 object
Venue                object
Rank                float64
University           object
Country              object
Team                 object
Contestant 1         object
Contestant 2         object
Contestant 3         object
Gold                   bool
Silver                 bool
Bronze                 bool
Honorable              bool
Score               float64
Total                 int64
Score Percentage    float64
Penalty             float64
Prize                object
dtype: object

Show all the categorical attributes¶

In [ ]:
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
cat_cols
Out[ ]:
['Date',
 'Host',
 'City',
 'Venue',
 'University',
 'Country',
 'Team',
 'Contestant 1',
 'Contestant 2',
 'Contestant 3',
 'Prize']

Dataset Glimpse¶

In [ ]:
df.head()
Out[ ]:
Year Date Host City Venue Rank University Country Team Contestant 1 ... Contestant 3 Gold Silver Bronze Honorable Score Total Score Percentage Penalty Prize
0 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology 1.0 University of Waterloo Canada U Waterloo David Kennedy ... Viet-Trung Luu True False False False 6.0 8 0.75 948.0 World Champion
1 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology 2.0 Albert-Ludwigs-Universität Freiburg Germany Freiburg B Team NaN ... NaN False True False False 6.0 8 0.75 992.0 NaN
2 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology 3.0 St. Petersburg Institute of Fine Mechanics & O... Russia St. Petersburg IFMO Alexander Volkov ... Vladimir Lyovkin False False True False 6.0 8 0.75 1046.0 NaN
3 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology 4.0 University of Bucharest Romania UNIBUC Bucharest NaN ... NaN False False True False 6.0 8 0.75 1048.0 NaN
4 1999 1999-04-11 Netherlands Eindhoven Eindhoven University of Technology 5.0 Duke University United States Duke Blue Devils NaN ... NaN False False True False 6.0 8 0.75 1337.0 NaN

5 rows × 21 columns

Warning: Total number of columns (21) exceeds max_columns (20) limiting to first (20) columns.

Rename Problematic Columns¶

In [ ]:
df = df.rename(columns={'Score Percentage': 'Score_Percentage'})

Unique values¶

In [ ]:
for col in df.columns:
    print(f'Unique values for {col}:')
    print(df[col].unique())
    print('---')
Unique values for Year:
[1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
 2013 2014 2015 2016 2017 2018 2019 2020 2021]
---
Unique values for Date:
['1999-04-11' '2000-03-18' '2001-03-10' '2002-03-23' '2003-03-25'
 '2004-03-31' '2005-04-06' '2006-04-12' '2007-03-15' '2008-04-09'
 '2009-04-21' '2010-02-05' '2011-05-30' '2012-05-17' '2013-07-03'
 '2014-06-25' '2015-05-20' '2016-05-19' '2017-05-24' '2018-04-19'
 '2019-04-04' '2021-10-05' '2022-11-10']
---
Unique values for Host:
['Netherlands' 'United States' 'Canada' 'Czechia' 'China' 'Japan' 'Sweden'
 'Poland' 'Russia' 'Morocco' 'Thailand' 'Portugal' 'Bangladesh']
---
Unique values for City:
['Eindhoven' 'Orlando, FL' 'Vancouver, BC' 'Honolulu, HI'
 'Beverly Hills, CA' 'Prague' 'Shanghai' 'San Antonio, TX'
 'Urayasu, Chiba' 'Banff, AB' 'Stockholm' 'Harbin' 'Warsaw'
 'Saint Petersburg' 'Yekaterinburg' 'Marrakech' 'Phuket' 'Rapid City, SD'
 'Beijing' 'Porto' 'Moscow' 'Dhaka']
---
Unique values for Venue:
['Eindhoven University of Technology' 'Radisson Hotel Universal Orlando'
 'The Westin Bayshore' 'Hilton Hawaiian Village' 'The Beverly Hilton'
 'Municipal House (Obecni Dum)' 'Pudong Shangri-La Hotel'
 'Baylor University' 'Hilton Tokyo Bay' 'Fairmont Banff Springs Hotel'
 'KTH – Royal Institute of Technology' 'Harbin Engineering University'
 'The Peabody Orlando Hotelg' 'University of Warsaw'
 'Yubileyny Sports Palace' 'Palace of Team Sports (DIVS)'
 'Palmeraie Conference Center' 'Phuket Sports Arena'
 'Rushmore Plaza Civic Center'
 "Peking University's Khoo Teck Puat Gymnasium"
 'Alfândega Congress Centre' 'Moscow Manege'
 'International Convention City Bashundhara']
---
Unique values for Rank:
[  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.  14.
  15.  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.  26.  27.  28.
  29.  30.  31.  32.  33.  34.  35.  36.  37.  38.  39.  40.  41.  42.
  43.  44.  45.  46.  47.  48.  49.  50.  51.  52.  53.  54.  55.  56.
  57.  58.  59.  60.  61.  62.  63.  64.  65.  66.  67.  68.  69.  70.
  71.  72.  73.  74.  75.  76.  77.  78.  79.  80.  81.  82.  83.  84.
  85.  86.  87.  88.  89.  90.  nan  91.  92.  93.  94.  95.  96.  97.
  98.  99. 100. 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. 111.
 112. 113. 114. 115. 116. 117. 118. 119. 120. 121. 122. 123. 124. 125.
 126. 127. 128. 129. 130. 131. 132. 133. 134. 135. 136. 137. 138. 139.]
---
Unique values for University:
['University of Waterloo' 'Albert-Ludwigs-Universität Freiburg'
 'St. Petersburg Institute of Fine Mechanics & Optics'
 'University of Bucharest' 'Duke University'
 'California Polytechnic State University'
 'University of California at Berkeley' 'Harvard University'
 'St. Petersburg State University' 'National Taiwan University'
 'University of Warsaw' 'Carnegie Mellon University'
 'Poznan University of Technology' 'University of Alberta'
 'POLITEHNICA University of Bucharest' 'Tsinghua University'
 'University of Otago' 'Moscow State University'
 'National University of Singapore' 'Cornell University'
 'Comenius University' 'KAIST' 'Kyoto University' 'Ulm University'
 'Virginia Tech' 'Rijksuniversiteit Groningen' 'University of Arkansas'
 'Shanghai University' 'University of Virginia'
 'The University of Queensland' 'University of Toronto'
 'Belarusian State University' 'Macalester College'
 'Zhongshan (Sun Yat-sen) University' 'Universidade de São Paulo'
 'The Johns Hopkins University' 'Harvey Mudd College'
 'Universidad de Buenos Aires' 'Florida State University'
 'Ural State University' 'National Tsing Hua University'
 'Georgia Institute of Technology' 'University of Colorado at Boulder'
 'Bangladesh University of Engineering and Technology'
 'Universidade Federal de Pernambuco' 'University of Kentucky'
 'Texas Tech University' 'The University of Texas at Austin'
 'Stanford University' 'Waseda University' 'University of Utah'
 'University of Nebraska - Lincoln' 'Universidad de las Americas-Puebla'
 'Queensland University of Technology' 'University of Missouri-Rolla'
 'ITESM Campus Monterrey' 'Rochester Institute of Technology'
 'University of Scranton' 'Sam Houston State University'
 'Al Akhawayn University' 'ITESM Campus Estado de Mexico'
 'North South University' 'University of Melbourne'
 'Shanghai Jiao Tong University' 'The Chinese University of Hong Kong'
 'California Institute of Technology' 'Charles University in Prague'
 'University of Central Florida' 'University of Washington'
 'Massachusetts Institute of Technology' 'Novosibirsk State University'
 'South Ural State University' 'Universidad Politécnica de Madrid'
 'George Mason University' 'Iowa State University' 'Linköping University'
 'Rose-Hulman Institute of Technology' 'Universiteit Leiden'
 'University of California San Diego' 'University of Pretoria'
 'Universidad Simón Bolívar'
 'Institute of Informatics PMF Skopje - Macedonia'
 'Amirkabir University of Technology' 'Harding University'
 'Indian Institute of Technology - Kanpur'
 'National Taiwan Normal University' 'Sharif University of Technology'
 'South Dakota State University' 'The American University in Cairo'
 'The University of Waikato' 'University of Calgary'
 'University of Oklahoma' 'Seoul National University' 'Umeå University'
 'The University of New South Wales' 'University of Sydney'
 'University of Valladolid'
 'Institute of Computing - University of Campinas' 'Yonsei University'
 'University of Chicago' 'University of Witwatersrand'
 'Indian Institute of Technology - Delhi' 'University of Hong Kong'
 'University of Tartu' 'Luther College' 'George Washington University'
 'Ferdowsi University of Mashhad' 'Instituto Tecnológico de Ciudad Madero'
 'LeTourneau University' 'Nizhny Novgorod State University'
 'Polytechnic University Long Island Campus' 'Saratov State University'
 'Fudan University' 'KTH - Royal Institute of Technology'
 'University of Wisconsin - Madison' 'University of Oldenburg'
 'Petrozavodsk State University' 'The University of Tokyo'
 'Sofia University'
 'State University - Education Science Production Complex'
 "Queen's University" 'Ecole Polytechnique' "Saint Mary's University"
 'Faculty of Computers and Artificial Intelligence - Cairo University'
 'National Yang Ming Chiao Tung University'
 'Florida Institute of Technology'
 'American International University - Bangladesh'
 'University of Wisconsin - Parkside' 'Allameh Helli High School'
 'Ewha Womans University' 'Messiah College'
 'Nanyang Technological University' 'University of North Carolina'
 'Taras Shevchenko Kiev National University' 'University of Cape Town'
 'Zhejiang University' 'Tokyo Institute of Technology'
 'Marina Nayanova University of Samara'
 'Institute of Physics and Technology - NTU of Ukraine "KPI"'
 'Universidade Estadual de Campinas'
 'University of Minnesota - Twin Cities'
 'Indian Institute of Technology - Bombay'
 'University of New Brunswick at Fredericton' 'University of Erlangen'
 'East China University of Science and Technology'
 'University of Auckland' 'Louisiana State University'
 'University Of Illinois - Chicago'
 'Pontificia Universidade Catolica do Rio de Janeiro'
 'Vologda State Pedagogical University' 'University of Oregon'
 'Mississippi State University' 'Brigham Young University'
 'Universidad de Guanajuato' 'West Virginia University'
 'Perm State University' 'Izhevsk State Technical University'
 'Kyrgyz Russian Slavic University' 'Universitat Politècnica de Catalunya'
 'University of British Columbia' 'Universität Ulm'
 'Norwegian University of Science and Technology'
 'University of Michigan at Ann Arbor' 'Jagiellonian University in Krakow'
 'Donghua University' 'Illinois Institute of Technology'
 'Universidad de Palermo' 'Michigan Technological University'
 'Institute of Technology and Management - Gurgaon'
 'South Dakota School of Mines and Technology' 'Shippensburg University'
 'University of Wroclaw' 'Peking University'
 'Ufa State Aviation Technical University'
 'Information and Communications University'
 'Instituto Tecnológico de Aeronautica' 'Altai State Technical University'
 'Indian Institute of Technology - Madras' 'Simon Fraser University'
 'École Normale Supérieure de Lyon' 'New York University'
 'Rutgers University' 'Texas A&M University' 'Renmin University of China'
 'Benemérita Universidad Autónoma de Puebla'
 'Universidade do Estado do Rio de Janeiro'
 'Universidad Tecnológica de la Mixteca' 'Ateneo de Manila University'
 'University of Twente' 'University of Science and Technology of China'
 'Technische Universität München' 'Lund University' 'Fuzhou University'
 'Princeton University' 'DePaul University' 'Yaroslavl State University'
 'Vinnytsia National Technical University' 'University of Adelaide'
 'Universidad Nacional de Colombia'
 "Ecole Nationale Supérieure d'Ingénieurs de Caen"
 'University of Maryland' 'Washington University in St. Louis'
 'Universidad Nacional de Córdoba - FaMAF' 'SUNY Stony Brook'
 'Indian Institute of Information Technology - Allahabad'
 'Kazakh-British Technical University' 'Madras Institute of Technology'
 'Universidad Central de Venezuela' 'Binghamton University'
 'Sichuan University' 'Ho Chi Minh City University of Technology'
 'University of Dhaka' 'Rice University' 'Shahid Beheshti University'
 'Universidad Autónoma de Tlaxcala'
 'St. Petersburg National Research University of IT, Mechanics and Optics'
 'Twente University' 'The University of Texas at Dallas'
 'Stavropol State University' 'Johannes Kepler Universität Linz'
 'University of Illinois at Urbana-Champaign'
 'National Technical University "Kharkiv Polytechnic Institute"'
 'Xiamen University' 'Universidad Centro Occidental Lisandro Alvarado'
 'Hefei University of Technology'
 'University of Engineering and Technology - VNU'
 'Bina Nusantara University' 'National University of Defense Technology'
 'Saitama University' 'Xidian University' 'Vanderbilt University'
 'National Institute of Technology, Trichy' 'Northwestern University'
 'Universidad de Chile' 'Kazakh State National University'
 'Arab Academy for Science and Technology (Alexandria)'
 'University of Minnesota - Morris'
 'University of North Carolina at Chapel Hill' 'Mercer University'
 'Lviv National University' 'University of Zagreb'
 'Moscow Institute of Physics & Technology'
 'Helsinki University of Technology' 'University of Oxford'
 'Karlsruhe Institute of Technology (KIT)'
 'AGH University of Science and Technology'
 'University of Electronic Science and Technology of China'
 'Universidad Autónoma de Madrid' 'Shandong University'
 'Beijing Jiaotong University' 'Chennai Mathematical Institute'
 'Tianjin University' 'Universidad de los Andes, Colombia'
 'Instituto Militar de Engenharia' 'East West University'
 'Nanjing University of Aeronautics and Astronautics'
 'Beijing University of Posts and Telecommunications'
 'Vietnam National University - Ho Chi Minh City'
 'University of North Texas' 'Brown University'
 'Universidad de Guadalajara' 'Wuhan University'
 'Hong Kong University of Science and Technology' 'Griffith University'
 'University of Florida' 'East China Normal University'
 'Faculty of Engineering, Cairo University' 'Illinois State University'
 'Instituto Tecnológico de Culiacán' 'University of Delaware'
 'University of Saskatchewan' 'Vietnam National University - Hanoi'
 'Kim Chaek University of Technology' 'University of Tehran'
 'I. Javakhishvili Tbilisi State University'
 'South China University of Technology'
 'Universidad de Buenos Aires - FCEN'
 'University of Cambridge - Trinity College'
 'Huazhong University of Science & Technology'
 'Zhejiang University of Technology'
 'Taurida V.I. Vernadsky National University'
 'National Technical University of Ukraine "Kiev Polytechnic Institute"'
 'Universidade Federal do Paraná' 'Nanjing University'
 'German University in Cairo' 'University of Aizu'
 'Universidad Nacional de Colombia - Bogotá' 'University of Canterbury'
 'Universidad Nacional del Sur' 'University of Tasmania'
 'EAFIT University'
 'Indian Institute of Information Technology & Management, Gwalior'
 'International Institute of Information Technology - Hyderabad'
 'McGill University' 'Northwestern College' 'Purdue University'
 'Universidad Nacional de La Plata' 'Universidade Estadual do Ceará'
 'Universidade Federal de Santa Catarina'
 'Universidade Federal do Espirito Santo'
 'Universidade de São Paulo - Escola Politécnica'
 'National Research University Higher School of Economics'
 'Samara State Aerospace University'
 'University of São Paulo - Institute of Mathematics and Statistics'
 'Columbia University' 'École Normale Supérieure ULM'
 'Southeast University, China' 'University of Helsinki'
 'University of the Philippines - Diliman'
 'Friedrich-Alexander-University Erlangen-Nuremberg' 'Sogang University'
 'The University of Western Australia' 'Universidade Federal de Sergipe'
 'Harbin Institute of Technology' 'University of Rochester'
 'Guangdong University of Technology' 'ITESM Campus Queretaro'
 'DJ Sanghvi College of Engineering' 'South China Agricultural University'
 'National Chiao Tung University' 'Ho Chi Minh City University of Science'
 'The British University in Egypt' 'Hangzhou Dianzi University'
 'Universidad Central "Marta Abreu" de Las Villas'
 'American University of Beirut' 'Harbin Engineering University'
 'Donetsk National University' 'ETH Zürich' 'University of Stellenbosch'
 'Pontificia Universidad Católica del Perú'
 'California State University - Chico'
 'Faculty of Computer and Information Sciences, Ain Shams University'
 'Leiden University' 'University of Miami'
 'Universidade Federal de Minas Gerais'
 'Universidad de Guanajuato - CIMAT' 'Zhejiang Normal University'
 'Alexandria University - Faculty of Engineering'
 'American University of Sharjah' 'Universidad del Valle'
 'Universidad Católica Boliviana - La Paz'
 'Universidad de las Ciencias Informáticas' 'Universidad de La Habana'
 'King Abdullah University of Science and Technology'
 'Belarus State University of Informatics and Radioelectronics'
 'University of Latvia' 'Ural Federal University'
 'Delft University of Technology' 'Volgograd State Technical University'
 'Universidade Federal de Campina Grande'
 'Universidad Nacional de Ingeniería' 'University of Southern California'
 'Institut Teknologi Bandung' 'Tomsk State University'
 'Universidade Federal do Rio de Janeiro' 'Korea University'
 'Eindhoven University of Technology' 'Aalto University'
 'The University of Electro-Communications'
 'Instituto Tecnológico de Santo Domingo' 'Udmurt State University'
 'University of Tulsa'
 'Graduate University of the Chinese Academy of Sciences'
 'Shahjalal University of Science and Technology'
 'International Islamic University Malaysia' 'College of William and Mary'
 'ITESM Campus Puebla' 'Northeast Normal University' 'Beihang University'
 'Ludwig-Maximilians Universität München'
 'Beijing Institute of Technology'
 'V.N. Karazin Kharkiv National University'
 'Ningbo Institute of Technology, ZJU' 'Moscow Aviation Institute'
 'SungKyunKwan University' 'International IT University'
 'Indian Institute of Technology - Roorkee'
 'Saint Petersburg Academic University – Nanotechnology Research and Education Centre RAS'
 'Universidad Nacional de Ingeniería - FIIS'
 'Moscow State Institute of Steel and Alloys' 'University of Lethbridge'
 'Kaunas University of Technology' 'Universität Rostock'
 'Indian Institute of Technology - Indore'
 'Universidad Panamericana Campus Bonaterra' 'Tishreen University'
 'Universidad Tecnologica de Pereira'
 'Escuela Superior De Computo Instituto Politecnico Nacional'
 'Can Tho University' 'Universidade de São Paulo - Campus de São Carlos'
 'University of Connecticut' 'University of Manitoba'
 'The University of Texas at Brownsville'
 'National University of Science and Technology "MISiS"'
 'Jilin University' 'Moscow State University - Tashkent'
 'Beijing Normal University' 'University of Cambridge'
 'Anil Neerukonda Institute of Technology and Sciences'
 'University of Tsukuba' 'Nazarbayev University' 'FPT University'
 'University of Sciences DPR Korea' 'Hunan University'
 'Odessa National Mechnikov University' 'University of California Irvine'
 'Universidad Católica San Pablo' 'Universitaet des Saarlandes'
 'Universidad de Oriente - Sede Antonio Maceo' 'University of Indonesia'
 'Amrita University' 'Delhi Technological University'
 'National Sun-Yat-Sen University'
 'Princess Sumaya University for Technology'
 'Universidad Autónoma de Aguascalientes'
 'University of California Los Angeles' 'Zhejiang SCI-TECH University'
 'Jahangirnagar University' 'University of Copenhagen'
 'The Australian National University'
 'Kazan (Volga Region) Federal University' 'Free University of Tbilisi'
 'Facultad de Ciencias Exactas-Universidad Nacional de Rosario'
 'Belarusian State Economic University'
 'Indian Institute of Technology - Kharagpur'
 'Faculty of Engineering, Ain Shams University'
 'Federal University of Minas Gerais' 'Yazd University'
 'Amrita School of Engineering, Kollam'
 'Universidad Privada de Santa Cruz de la Sierra' 'Carleton College'
 'ENSA Marrakech, UCA' 'St. Petersburg ITMO University'
 'Belarusian State University of Informatics and Radioelectronics'
 'St. Petersburg Academic University' 'Kim Il Sung University'
 'Universidad Nacional de Rosario' 'Innopolis University'
 'Taras Shevchenko National University of Kyiv'
 'Moscow Engineering Physics Institute' 'Radboud University'
 'Zaporizhzhya National Technical University' 'Utrecht University'
 'Universidade Federal da Bahia'
 'Birla Institute of Technology & Science, Pilani Campus'
 'Osaka University' 'Instituto Tecnológico Autónomo de México'
 'Ningbo University' 'Aleppo University'
 'Northern (Arctic) Federal University' 'Imperial College London'
 'Ohio State University' 'Northeastern University (China)'
 'Chulalongkorn University' 'The University of South Dakota'
 'Damascus University' 'Islamic Azad University of Mashhad'
 'Prince of Songkla University' 'Tomsk Polytechnic University'
 'Uzhgorod National University' 'Universitas Indonesia'
 'École Normale Supérieure de Paris' 'Vilnius University'
 'Dhirubhai Ambani Institute of Information and Communication Technology, Gandhinagar'
 'Universidade Federal do Rio Grande do Norte' 'Yerevan State University'
 'Universidade Federal de Itajubá - Campus Itajubá'
 'Samara National Research University'
 'Faculty of Engineering (at Shoubra), Benha University'
 'TOBB University of Economics and Technology' 'Monash University'
 'Pontificia Universidad Católica del Perú - FCI' 'Keio University'
 'International Institute of Information Technology - Bangalore'
 'University of Pennsylvania'
 'National Institute of Technology Karnataka, Surathkul'
 'Facultad de Ciencias-Universidad Nacional Autónoma de México'
 'Cedarville University' 'Universidad Mayor de San Simon - Sistemas'
 'Universidad Autónoma de Nuevo Leon' 'Mount Allison University'
 'Al-Baath University' 'Universidad de Pinar del Río'
 'University of Management and Technology' 'Scuola Normale Superiore'
 'Ulsan National Institute of Science and Technology'
 'Indian Institute of Technology - Patna'
 'Northwestern Polytechnical University' 'University of Bergen'
 "Xi'an Jiaotong University" 'Colorado School of Mines'
 'Pontificia Universidad Católica Madre y Maestra - Campus Santo Domingo'
 'Higher Institute for Applied Sciences and Technology'
 'Universidad de Costa Rica' 'Karachi Institute of Economics & Technology'
 'University of Computer Studies, Yangon'
 'KimChaek University of Technology' 'Syrian Virtual University'
 'University of Niš - Faculty of Sciences and Mathematics'
 'Hanoi University of Science and Technology' 'Universidade do Porto'
 'University of Brasilia'
 'St. Petersburg Campus of Higher School of Economics'
 'American University of Beirut, Faculty of Arts and Science'
 'Alexandru Ioan Cuza University of Iași'
 'International Black Sea University' 'The University of Auckland'
 'Nanjing University of Science and Technology'
 'Universidad Nacional de Ingeniería - FC' 'IIIT-Delhi'
 'Faculty of Engineering- Mansoura University' 'Drexel University'
 'Universidad Tecnológica Nacional - Facultad Regional Santa Fe'
 'Lebanese American University'
 'Instituto Tecnológico Superior del Sur de Guanajuato'
 'Tecnológico de Costa Rica' 'Moscow Institute of Physics and Technology'
 'Kharkiv National University of Radio Electronics'
 'Utrecht - Leiden University' 'Faculty of Computer Science, Belgrade'
 'Indraprastha Institute of Information Technology'
 'Universidade de Brasília' 'Bilkent University'
 'BITS-Pilani, Hyderabad Campus'
 'Arab Academy for Science and Technology - Alexandria'
 'Università Degli Studi di Milano'
 'Netaji Subhash University Of Technology'
 'Indian Institute of Technology - Guwahati'
 'Kyungpook National University'
 'Ain Shams University - Faculty of Computer and Information Sciences'
 'Universidad Icesi' 'Indian Institute of Technology (ISM), Dhanbad'
 'Indian Institute of Technology - Varanasi'
 'Institute of Computing - Federal University of Amazonas'
 'Instituto de Informática - UFG'
 'National Research Nuclear University MEPhI (Moscow Engineering Physics Institute)'
 'American University - Central Asia'
 'Universidade Federal do Ceará - Campus Quixadá'
 'Universidad de Guanajuato - DCNE' 'The University of Jordan'
 'Cairo University - Faculty of Computers and Artificial Intelligence'
 'Institut National des Sciences Appliquées et de Technologie'
 'Avicenna University' 'NU-FAST Karachi' 'The University of Asia Pacific'
 'Swarthmore College' 'UNSW Sydney' 'University of Science, VNU-HCM'
 'Karlsruhe Institute of Technology'
 'Belarusian National Technical University'
 'Universidad de Guadalajara CUCEI' 'Northeastern University'
 'Birla Institute of Technology and Science, Pilani Campus'
 'Al-Azhar University' 'Ain Shams University - Faculty of Engineering'
 'Yale University' 'Suleyman Demirel University'
 'University of Wisconsin-Madison' 'El Shorouk Academy'
 'Adama Science and Technology University' 'University of Asia Pacific'
 'Universidad Mayor de San Simón'
 'Rajshahi University of Engineering & Technology'
 'The University of British Columbia'
 'Cairo University - Faculty of Engineering'
 'Arab Academy for Science and Technology - Cairo'
 'FAST Institute of Computer Science'
 'University of Chinese Academy of Sciences']
---
Unique values for Country:
['Canada' 'Germany' 'Russia' 'Romania' 'United States' 'Taiwan' 'Poland'
 'China' 'New Zealand' 'Singapore' 'Slovakia' 'South Korea' 'Japan'
 'Netherlands' 'Australia' 'Belarus' 'Brazil' 'Argentina' 'Bangladesh'
 'Mexico' 'Morocco' 'Hong Kong' 'Czechia' 'Spain' 'Sweden' 'South Africa'
 'Venezuela' 'North Macedonia' 'Iran' 'India' 'Egypt' 'Estonia' 'Bulgaria'
 'France' 'Ukraine' 'Kyrgyzstan' 'Norway' 'Philippines' 'Colombia'
 'Kazakhstan' 'Viet Nam' 'Austria' 'Indonesia' 'Chile' 'Croatia' 'Finland'
 'United Kingdom' 'North Korea' 'Georgia' 'Cuba' 'Lebanon' 'Switzerland'
 'Peru' 'United Arab Emirates' 'Bolivia' 'Saudi Arabia' 'Latvia'
 'Dominican Republic' 'Malaysia' 'Lithuania' 'Syria' 'Uzbekistan' 'Jordan'
 'Denmark' 'Thailand' 'Armenia' 'Turkey' 'Pakistan' 'Italy' 'Costa Rica'
 'Myanmar' 'Serbia' 'Portugal' 'Tunisia' 'Afghanistan' 'Ethiopia']
---
Unique values for Team:
['U Waterloo' 'Freiburg B Team' 'St. Petersburg IFMO' ... 'UTP - Lucas'
 'Chayotes UCR' 'Retired Masters']
---
Unique values for Contestant 1:
['David Kennedy' nan 'Alexander Volkov' ... 'Jhon Alex Gaviria Tobón'
 'César Herrera Garro' 'Tianle Chen']
---
Unique values for Contestant 2:
['Ondrej Lhotak' nan 'Matvey Kazakov' ... 'Juan Camilo Palacios Galvis'
 'Kevin Coto' 'Xuandeng Fu']
---
Unique values for Contestant 3:
['Viet-Trung Luu' nan 'Vladimir Lyovkin' ... 'Syed Muhammad Ali Mustafa'
 'Omar Tarek' 'Yunhan Shen']
---
Unique values for Gold:
[ True False]
---
Unique values for Silver:
[False  True]
---
Unique values for Bronze:
[False  True]
---
Unique values for Honorable:
[False  True]
---
Unique values for Score:
[ 6.  5.  4.  3.  2.  1.  0.  7.  9.  8. nan 10. 13. 11. 12.]
---
Unique values for Total:
[ 8  9 10 11 12 13 15]
---
Unique values for Score_Percentage:
[0.75 0.62 0.5  0.38 0.25 0.12 0.   0.88 0.67 0.56 0.44 0.33 0.22 0.11
 0.9  0.8  0.7  0.6  0.4  0.3  0.2  0.1  1.   0.73 0.64 0.55 0.45 0.36
 0.27 0.18 0.09 0.82  nan 0.58 0.42 0.17 0.08 0.91 0.85 0.77 0.69 0.54
 0.46 0.31 0.23 0.15 0.83 0.53 0.47 0.13 0.07 0.92]
---
Unique values for Penalty:
[ 948.  992. 1046. ...  116.  124.  131.]
---
Unique values for Prize:
['World Champion' nan 'World Champion, Europe Champion'
 'South Pacific Champion' 'North America Champion' 'Asia Champion'
 'Latin America Champion' 'Africa and the Middle East Champion'
 'World Champion, Asia Champion' 'Europe Champion'
 'World Champion, Northern Eurasia Champion' 'Asia East Champion'
 'Asia Pacific Champion' 'Asia West Champion'
 'World Champion, North America Champion' 'Northern Eurasia Champion']
---

Mapping the categorical data to numarical¶

In [ ]:
df['Host'] = df['Host'].map({'Netherlands': 14, 'United States': 5, 'Canada' : 1, 'Czechia' : 23, 'China' : 8, 'Japan' : 13, 'Sweden' : 25, 'Poland' : 7, 'Russia' : 3, 'Morocco' : 21, 'Thailand' : 65, 'Portugal' : 73, 'Bangladesh' : 19})
df['Country'] = df['Country'].map({'Canada': 1, 'Germany': 2, 'Russia': 3, 'Romania': 4, 'United States': 5, 'Taiwan': 6, 'Poland': 7, 'China': 8, 'New Zealand': 9, 'Singapore': 10, 'Slovakia': 11, 'South Korea': 12, 'Japan': 13, 'Netherlands': 14, 'Australia': 15,
    'Belarus': 16, 'Brazil': 17, 'Argentina': 18, 'Bangladesh': 19, 'Mexico': 20, 'Morocco': 21,'Hong Kong': 22,'Czechia': 23,'Spain': 24,'Sweden': 25,'South Africa': 26,'Venezuela': 27,'North Macedonia': 28,'Iran': 29,
    'India': 30,'Egypt': 31,'Estonia': 32,'Bulgaria': 33,'France': 34,'Ukraine': 35,'Kyrgyzstan': 36,'Norway': 37,'Philippines': 38,'Colombia': 39,'Kazakhstan': 40,'Viet Nam': 41,'Austria': 42,'Indonesia': 43, 'Chile': 44, 'Croatia': 45,
    'Finland': 46,'United Kingdom': 47,'North Korea': 48, 'Georgia': 49, 'Cuba': 50,'Lebanon': 51,
    'Switzerland': 52,'Peru': 53, 'United Arab Emirates': 54, 'Bolivia': 55, 'Saudi Arabia': 56,'Latvia': 57,'Dominican Republic': 58,'Malaysia': 59, 'Lithuania': 60,'Syria': 61, 'Uzbekistan': 62,'Jordan': 63,'Denmark': 64,'Thailand': 65,
    'Armenia': 66,'Turkey': 67,'Pakistan': 68,'Italy': 69,'Costa Rica': 70,'Myanmar': 71,'Serbia': 72,'Portugal': 73,'Tunisia': 74,'Afghanistan': 75,'Ethiopia': 76,
})
df['Gold'] = df['Gold'].astype(int)
df['Silver'] = df['Silver'].astype(int)
df['Bronze'] = df['Bronze'].astype(int)
df['Honorable'] = df['Honorable'].astype(int)
df['Prize'] = df['Prize'].map({'World Champion': 1,'World Champion, Europe Champion': 2,'South Pacific Champion': 3,'North America Champion': 4,'Asia Champion': 5,'Latin America Champion': 6,'Africa and the Middle East Champion': 7,'World Champion, Asia Champion': 8,
                               'Europe Champion': 9,'World Champion, Northern Eurasia Champion': 10,'Asia East Champion': 11,'Asia Pacific Champion': 12,'Asia West Champion': 13,'World Champion, North America Champion': 14,'Northern Eurasia Champion': 15}).fillna(0)
df.head()
Out[ ]:
Year Date Host City Venue Rank University Country Team Contestant 1 ... Contestant 3 Gold Silver Bronze Honorable Score Total Score_Percentage Penalty Prize
0 1999 1999-04-11 14 Eindhoven Eindhoven University of Technology 1.0 University of Waterloo 1 U Waterloo David Kennedy ... Viet-Trung Luu 1 0 0 0 6.0 8 0.75 948.0 1.0
1 1999 1999-04-11 14 Eindhoven Eindhoven University of Technology 2.0 Albert-Ludwigs-Universität Freiburg 2 Freiburg B Team NaN ... NaN 0 1 0 0 6.0 8 0.75 992.0 0.0
2 1999 1999-04-11 14 Eindhoven Eindhoven University of Technology 3.0 St. Petersburg Institute of Fine Mechanics & O... 3 St. Petersburg IFMO Alexander Volkov ... Vladimir Lyovkin 0 0 1 0 6.0 8 0.75 1046.0 0.0
3 1999 1999-04-11 14 Eindhoven Eindhoven University of Technology 4.0 University of Bucharest 4 UNIBUC Bucharest NaN ... NaN 0 0 1 0 6.0 8 0.75 1048.0 0.0
4 1999 1999-04-11 14 Eindhoven Eindhoven University of Technology 5.0 Duke University 5 Duke Blue Devils NaN ... NaN 0 0 1 0 6.0 8 0.75 1337.0 0.0

5 rows × 21 columns

Warning: Total number of columns (21) exceeds max_columns (20) limiting to first (20) columns.
In [ ]:
df.dtypes
Out[ ]:
Year                  int64
Date                 object
Host                  int64
City                 object
Venue                object
Rank                float64
University           object
Country               int64
Team                 object
Contestant 1         object
Contestant 2         object
Contestant 3         object
Gold                  int64
Silver                int64
Bronze                int64
Honorable             int64
Score               float64
Total                 int64
Score_Percentage    float64
Penalty             float64
Prize               float64
dtype: object
In [ ]:
df.isnull().sum()
Out[ ]:
Year                  0
Date                  0
Host                  0
City                  0
Venue                 0
Rank                 43
University            0
Country               0
Team                  2
Contestant 1        400
Contestant 2        400
Contestant 3        401
Gold                  0
Silver                0
Bronze                0
Honorable             0
Score                33
Total                 0
Score_Percentage     33
Penalty             118
Prize                 0
dtype: int64

Fix Missing Values by using the previous values¶

In [ ]:
df.fillna(method='ffill', inplace=True)
df.isnull().sum()
Out[ ]:
Year                0
Date                0
Host                0
City                0
Venue               0
Rank                0
University          0
Country             0
Team                0
Contestant 1        0
Contestant 2        0
Contestant 3        0
Gold                0
Silver              0
Bronze              0
Honorable           0
Score               0
Total               0
Score_Percentage    0
Penalty             0
Prize               0
dtype: int64

Co-Relationship¶

Pair Plot¶

In [ ]:
sns.set(style='ticks')
sns.pairplot(df, vars=['Year', 'Host', 'Rank', 'Country', 'Gold', 'Silver', 'Bronze', 'Honorable', 'Score', 'Total', 'Score_Percentage', 'Penalty', 'Prize'], kind='scatter')
plt.show()
In [ ]:
sns.set(style='ticks')
sns.pairplot(df, vars=['Year', 'Host', 'Rank', 'Country', 'Gold', 'Silver', 'Bronze', 'Honorable', 'Score', 'Total', 'Score_Percentage', 'Penalty', 'Prize'], kind='reg')
plt.show()

Heatmap¶

In [ ]:
corr_matrix = data.corr() # create a correlation matrix
plt.figure(figsize=(12, 12))
cmap = sns.diverging_palette(10, 220, as_cmap=True) # define a custom diverging color palette with red for negative values and blue for positive values
sns.heatmap(corr_matrix, annot=True, cmap=cmap, linewidths=0.1, linecolor='white', fmt='.2f', center=0, square=True, cbar=True, cbar_kws={'orientation': 'horizontal'}) # create a heatmap with colorbar and annotations
<ipython-input-52-ab84809d8d7a>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr_matrix = data.corr() # create a correlation matrix
Out[ ]:
<Axes: >

Feature Selection¶

In [ ]:
# Separate the features and target variable (## Rank Gold Silver Honorable Score Score_Percentage Penalty)
y = df['Prize']
X = df.drop(['Year', 'Date', 'Host', 'City', 'Venue', 'University', 'Country', 'Team', 'Contestant 1', 'Contestant 2', 'Contestant 3', 'Bronze', 'Total', 'Prize'], axis=1)

Dataset Splitting¶

In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 80% Training and 20% Testing
# Preprocess the data by scaling the features
scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

Model Training¶

Naive Bayes¶

In [ ]:
# Train a Multinomial Naive Bayes classifier on the training set
clf = MultinomialNB()
clf.fit(X_train, y_train)

# Predict the target values for the test set
y_pred = clf.predict(X_test)

# Evaluate the performance of the classifier on the test set
Naive_Bayes_Accuracy = clf.score(X_test, y_test)
print("Naive Bayes Accuracy:", Naive_Bayes_Accuracy*100,"%")
Naive Bayes Accuracy: 63.20346320346321 %

Knn¶

In [ ]:
knn = KNeighborsClassifier(n_neighbors=5)

# Fitting the model with training data
knn.fit(X_train, y_train)

# Predicting the class labels for testing data
y_pred = knn.predict(X_test)

# Evaluating the model performance
KNN_Accuracy = knn.score(X_test, y_test)
print("Accuracy:", KNN_Accuracy*100,"%")
Accuracy: 94.58874458874459 %

Decision Tree¶

In [ ]:
# Creating the Decision Tree model
dt = DecisionTreeClassifier()

# Fitting the model with training data
dt.fit(X_train, y_train)

# Predicting the class labels for testing data
y_pred = dt.predict(X_test)

# Evaluating the model performance
Decision_Tree_Accuracy = dt.score(X_test, y_test)
print("Accuracy:", Decision_Tree_Accuracy*100, "%")
Accuracy: 90.6926406926407 %

Logistic Regression¶

In [ ]:
# Creating the Logistic Regression model
lr = LogisticRegression()

# Fitting the model with training data
lr.fit(X_train, y_train)

# Predicting the class labels for testing data
y_pred = lr.predict(X_test)

# Evaluating the model performance
Logistic_Regression_Accuracy = lr.score(X_test, y_test)
print("Accuracy:", Logistic_Regression_Accuracy*100, "%")
Accuracy: 94.8051948051948 %
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

SVM¶

In [ ]:
# Creating the SVM model
svm = SVC()

# Fitting the model with training data
svm.fit(X_train, y_train)

# Predicting the class labels for testing data
y_pred = svm.predict(X_test)

# Evaluating the model performance
SVM_Accuracy = svm.score(X_test, y_test)
print("Accuracy:", SVM_Accuracy*100, "%")
Accuracy: 94.8051948051948 %